{ "cells": [ { "cell_type": "markdown", "id": "a9ac2491-124b-4032-8a58-70b9613841c8", "metadata": {}, "source": [ "# Sample quality control" ] }, { "cell_type": "code", "execution_count": null, "id": "5ed7cef2-9031-47e1-8d51-6191baba5cd2", "metadata": {}, "outputs": [], "source": [ "from pylluminator.samples import Samples\n", "from pylluminator.utils import set_logger\n", "from pylluminator.annotations import Channel\n", "from pylluminator.quality_control import *\n", "\n", "import matplotlib.pyplot as plt\n", "import pandas as pd\n", "\n", "set_logger('WARNING') # set the verbosity level, can be DEBUG, INFO, WARNING, ERROR" ] }, { "cell_type": "markdown", "id": "3a036692-253f-40db-b5c0-dab9007db71b", "metadata": {}, "source": [ "## Load pylluminator Samples\n", "\n", "We assume that you have already processed the .idat files according to your preferences and saved them. If not, please refer to notebook `1 - Read data and get beta values` before going any further." ] }, { "cell_type": "code", "execution_count": null, "id": "0170564a-0d57-4cd5-8c5e-5d779dd1bb78", "metadata": {}, "outputs": [], "source": [ "my_samples = Samples.load('preprocessed_samples')\n", "my_samples" ] }, { "cell_type": "markdown", "id": "361773da-c388-4f3b-84de-f1a83e073b55", "metadata": {}, "source": [ "## Chose a sample and print QCs" ] }, { "cell_type": "code", "execution_count": null, "id": "00eb6ccd-4987-4d84-ae25-c8c010d512a4", "metadata": {}, "outputs": [], "source": [ "sample_name = 'LNCAP_500_3'\n", "detection_stats(my_samples, sample_name)\n", "intensity_stats(my_samples, sample_name)\n", "nb_probes_stats(my_samples, sample_name)\n", "type1_color_channels_stats(my_samples, sample_name)\n", "dye_bias_stats(my_samples, sample_name)\n", "betas_stats(my_samples, sample_name)" ] }, { "cell_type": "markdown", "id": "fc19e3c4-e208-4b65-a45a-e100bb3f1256", "metadata": {}, "source": [ "## Plot the number of beads per probe" ] }, { "cell_type": "markdown", "id": "312542d6-00b4-49a2-8d22-f2738d43378e", "metadata": {}, "source": [ "This will work only if you have kept the .idat data when reading the samples (parameter `keep_idat=True`)" ] }, { "cell_type": "code", "execution_count": null, "id": "aad3e01b-35fe-46c0-9e9b-6ab72f3ed9d5", "metadata": {}, "outputs": [], "source": [ "count_beads = my_samples.idata[sample_name][Channel.RED].groupby('n_beads').count()\n", "\n", "# don't display the (very) long tail of the graph, stop at the last bin that contains at least 5 probes\n", "five_probes_min = count_beads[count_beads.mean_value > 5]\n", "max_more_than_five_probe = five_probes_min.index.max()\n", "\n", "# plot\n", "plt.figure(figsize=(10, 7))\n", "plt.bar(five_probes_min.index, five_probes_min['std_dev'])\n", "plt.xlabel('Number of detected beads per probe')\n", "_ = plt.ylabel('Probes count')" ] } ], "metadata": { "language_info": { "name": "python" } }, "nbformat": 4, "nbformat_minor": 5 }